
R version 4.5.2 (2025-10-31) -- "[Not] Part in a Rumble"
Copyright (C) 2025 The R Foundation for Statistical Computing
Platform: aarch64-apple-darwin20

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> # Appendix
> library(tidyverse)
-- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
v dplyr     1.1.4     v readr     2.1.6
v forcats   1.0.1     v stringr   1.6.0
v ggplot2   4.0.1     v tibble    3.3.0
v lubridate 1.9.4     v tidyr     1.3.1
v purrr     1.2.0     
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
> library(xtable)
> library(stargazer)

Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 

> library(monomvn)
Loading required package: pls

Attaching package: 'pls'

The following object is masked from 'package:stats':

    loadings

Loading required package: lars
Loaded lars 1.3

Loading required package: MASS

Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select

> library(ggthemes)
> library(broom)
> library(dplyr)
> library(tidyr)
> library(scales)

Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

> 
> ##########################################################
> # D. Survey Demographics
> ##########################################################
> 
> source('Code/merge_waves_marijuana.R')

Attaching package: 'data.table'

The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year

The following objects are masked from 'package:dplyr':

    between, first, last

The following object is masked from 'package:purrr':

    transpose

> source('Code/merge_waves_tech.R')
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Registered S3 method overwritten by 'lfe':
  method    from 
  nobs.felm broom
Joining with `by = join_by(PID)`
Joining with `by = join_by(PID)`
Joining with `by = join_by(PIDw1)`
Joining with `by = join_by(PIDw1, wave)`
Joining with `by = join_by(PID)`
Joining with `by = join_by(PID)`
Joining with `by = join_by(PIDw1)`
Joining with `by = join_by(PIDw1, wave)`
> 
> # Process Experiment 1 (Marijuana) data
> mar <- df
> mar$racew1[grep(',', mar$racew1)] <- 'Multi-Racial'
> mar$racew1 <- recode(mar$racew1, '1'='Black', '2'='Asian and Pacific Islander', '3'='Non-Hispanic White', '4'= 'Hispanic', '5'='Native American', '6'='Other', '7'='Other')
> mar$genderw1 <- recode(mar$genderw1, '1'='Male', '2'='Female')
> mar$genderw1 <- factor(mar$genderw1, levels=c('Male', "Female", 'Other'))
> mar$eduw1 <- recode(mar$educw1, '1'='Did Not Graduate High School', '2' ='High School Graduate', '3' = 'Some College', '4'='2 Year Degree', '5'='4 Year Degree', '6'='Post Graduate Degree')
> mar$eduw1 <- factor(mar$eduw1, levels=c('Did Not Graduate High School', 'High School Graduate', 'Some College', '2 Year Degree', '4 Year Degree', 'Post Graduate Degree'))
> mar$pid3w1 <- recode(mar$party1w1, '1'='Democrat', '2'='Republican', '3'='Independent/ Third Party', '4'="Independent/ Third Party")
> mar$pid3_leanerw1 <- ifelse(mar$party1w1 == 1, 'Democrat',
+                             ifelse(mar$party1w1 == 2, 'Republican',
+                                    ifelse(mar$party1w1 %in% c(3, 4) & mar$party4w1 == 1, 'Republican',
+                                           ifelse(mar$party1w1 %in% c(3, 4) & mar$party4w1 == 2, 'Democrat',
+                                                  ifelse(mar$party1w1 %in% c(3, 4) & mar$party4w1 == 3, 'Independent', NA)))))
> 
> mar$pid3_leanerw1 <- factor(mar$pid3_leanerw1, levels = c('Democrat', 'Republican', 'Independent'))
> 
> 
> # Process Experiment 2 and 3 (Tech) data
> get_race_vec <- function(race_df){
+   race <- rep(NA, nrow(race_df))
+   race[race_df$whitew1] <- 'Non-Hispanic White'
+   race[race_df$blackw1] <- 'Black'
+   race[race_df$hispanicw1] <- 'Hispanic'
+   race[race_df$asianw1] <- 'Asian and Pacific Islander'
+   race[race_df$native.americanw1] <- 'Native American'
+   race[race_df$middle.easternw1| race_df$otherw1] <- 'Other'
+   race[rowSums(race_df[,c('whitew1', 'blackw1', 'hispanicw1', 'asianw1', 'native.americanw1', 'middle.easternw1', 'otherw1')]) > 1] <- 'Multi-Racial'
+   race 
+ }
> 
> process_tech_data <- function(df) {
+   df$racew1 <- get_race_vec(df)
+   df$eduw1 <- recode(df$eduw1, '1'='Did Not Graduate High School', '2' ='High School Graduate', '3' = 'Some College', '4'='2 Year Degree', '5'='4 Year Degree', '6'='Post Graduate Degree')
+   df$eduw1 <- factor(df$eduw1, levels=c('Did Not Graduate High School', 'High School Graduate', 'Some College', '2 Year Degree', '4 Year Degree', 'Post Graduate Degree'))
+   df$genderw1 <- factor(df$genderw1, levels=c('Male', "Female", 'Other'))
+   df$pid3w1[df$pid3w1 == 'Independent'] <- 'Independent/ Third Party'
+   df$pid3w1[df$pid3w1 == 'Other Party'] <- 'Independent/ Third Party'
+   return(df)
+ }
> 
> text <- process_tech_data(text)
> vid <- process_tech_data(vid)
> 
> create_summary_table <- function(var1, var2, var3, var_name) {
+   summary_function <- function(vec) {
+     round(prop.table(table(vec)), 2)
+   }
+   
+   make_table <- function(vecs) {
+     mat <- do.call(cbind, lapply(vecs, summary_function))
+     colnames(mat) <- c('Experiment 1', 'Experiment 2', 'Experiment 3')
+     mat
+   }
+   
+   table_data <- make_table(list(var1, var2, var3))
+   table_data <- as.data.frame(table_data)
+   table_data <- tibble::rownames_to_column(table_data, var = var_name)
+   return(table_data)
+ }
> 
> 
> create_balance_table <- function(data, treat_var) {
+   data[[treat_var]] <- factor(data[[treat_var]], levels = c("Control", "Fox", "MSNBC"))
+   
+   educ_levels <- c("Did Not Graduate High School", "High School Graduate", "Some College",
+                    "2 Year Degree", "4 Year Degree", "Post Graduate Degree")
+   gender_levels <- c("Male", "Female", "Other")
+   party_levels  <- c("Democrat", "Republican", "Independent")
+   race_levels   <- c("Non-Hispanic White", "Black", "Hispanic", "Asian and Pacific Islander",
+                      "Native American", "Multi-Racial", "Other")
+   
+   get_balance <- function(data, var, treat_var, levels_order) {
+ 
+     df <- data %>%
+       filter(!!sym(var) %in% levels_order) %>%
+       count(!!sym(treat_var), !!sym(var)) %>%  
+       group_by(!!sym(treat_var)) %>%
+       mutate(prop = round(n / sum(n), 2)) %>%
+       ungroup()
+     
+     df_wide <- df %>%
+       pivot_wider(
+         id_cols = !!sym(var),
+         names_from = !!sym(treat_var),
+         values_from = prop,
+         values_fill = list(prop = 0)
+       )
+     
+     df_wide <- df_wide %>%
+       mutate(!!var := factor(!!sym(var), levels = levels_order)) %>%
+       arrange(!!sym(var)) %>%
+       rename(Category = !!sym(var))
+     
+     total <- data %>%
+       filter(!!sym(var) %in% levels_order) %>%
+       count(!!sym(var)) %>%
+       mutate(Total = round(n / sum(n), 2)) %>%
+       mutate(!!sym(var) := factor(!!sym(var), levels = levels_order)) %>%
+       arrange(!!sym(var)) %>%
+       rename(Category = !!sym(var)) %>%
+       dplyr::select(Category, Total)
+     
+     df_wide <- left_join(df_wide, total, by = "Category")
+     
+     return(df_wide)
+   }
+   
+   educ_table   <- get_balance(data, "eduw1", treat_var, educ_levels) %>% mutate(Demographic = "Education")
+   gender_table <- get_balance(data, "genderw1",  treat_var, gender_levels) %>% mutate(Demographic = "Gender")
+   party_table  <- get_balance(data, "pid3_leanerw1",  treat_var, party_levels) %>% mutate(Demographic = "Party")
+   race_table   <- get_balance(data, "racew1",   treat_var, race_levels) %>% mutate(Demographic = "Race")
+   
+   balance_table <- bind_rows(educ_table, gender_table, party_table, race_table)
+   
+   cols_needed <- c("Demographic", "Category", "Control", "Fox", "MSNBC", "Total")
+   for (col in setdiff(cols_needed, names(balance_table))) {
+     balance_table[[col]] <- 0
+   }
+   balance_table <- balance_table[, cols_needed]
+   
+   balance_table <- balance_table %>%
+     group_by(Demographic) %>%
+     mutate(Demographic = if_else(row_number() == 1, Demographic, "")) %>%
+     ungroup()
+   
+   return(balance_table)
+ }
> 
> # --- Experiment 1 ---
> mar_forced <- mar %>% 
+   filter(forcedchoicew1 == 1, forcedchoicew2 == 0) %>%
+   mutate(treatment = case_when(
+     foxw1 == 1 ~ "Fox",
+     msnbcw1 == 1 ~ "MSNBC",
+     TRUE ~ "Control"
+   ))
> 
> balance_exp1 <- create_balance_table(mar_forced, "treatment")
> 
> # --- Experiment 2 ---
> text <- text %>%
+   mutate(treatmentw1 = recode(treatmentw1,
+                               "anti" = "Fox",
+                               "pro" = "MSNBC",
+                               "placebo" = "Control"))
> 
> balance_exp2 <- create_balance_table(text, "treatmentw1")
> 
> # --- Experiment 3 ---
> vid <- vid %>%
+   mutate(treatmentw1 = recode(treatmentw1,
+                               "anti" = "Fox",
+                               "pro" = "MSNBC",
+                               "placebo" = "Control"))
> 
> balance_exp3 <- create_balance_table(vid, "treatmentw1")
> 
> print(xtable(balance_exp1, caption="Balance Table for Experiment 1", label="tab:exp1_balance"),
+       file="Output/tableA4_exp1.tex", include.rownames=FALSE)
> 
> print(xtable(balance_exp2, caption="Balance Table for Experiment 2", label="tab:exp2_balance"),
+       file="Output/tableA5_exp2.tex", include.rownames=FALSE)
> 
> print(xtable(balance_exp3, caption="Balance Table for Experiment 3", label="tab:exp3_balance"),
+       file="Output/tableA6_exp3.tex", include.rownames=FALSE)
> 
> 
> ##########################################################
> # H. Treatment effects for individual questions
> ##########################################################
> 
> ### Experiment 1
> 
> df_forced <- df %>% filter(forcedchoicew1 == 1, forcedchoicew2==0)
> 
> outcomes <- c("mar_tradeoff", "mar_econ", "mar_costmore", "mar_fewserious",
+               "mar_wrong", "mar_violence", "mar_legmed", "mar_serious",
+               "mar_legrec", "danger_mar")
> 
> outcome_labels <- c("whether drug use is a health problem v criminal issue", 
+                     "whether marijuana legalization makes the economy better", 
+                     "whether government efforts to enforce marijuana laws cost more than they are worth", 
+                     "whether the legalization of marijuana leads to fewer people using more serious drugs, such as heroin and cocaine",
+                     "whether marijuana use is morally wrong", 
+                     "whether marijuana use increases violent crime", 
+                     "whether marijuana should be legal for medical use",
+                     "whether marijuana use is a serious problem today",
+                     "whether marijuana should be legal for recreational use", 
+                     "how dangerous is marijuana")
> 
> create_and_print_table <- function(outcome, outcome_label) {
+   model_w1 <- lm(as.formula(paste0(outcome, "w1 ~ foxw1 + msnbcw1")), data = df_forced)
+   model_w2 <- lm(as.formula(paste0(outcome, "w2 ~ foxw1 + msnbcw1")), data = df_forced)
+   
+   capture.output(
+     stargazer(model_w1, model_w2,
+               type = "latex",
+               out = paste0("Output/Individual_questions/", outcome, ".tex"),
+               title = paste("Effect of treatment on", outcome_label),
+               label = paste0("tab:", outcome),
+               column.labels = c("Wave 1", "Wave 2"),
+               model.names = FALSE,
+               dep.var.labels = NULL,
+               covariate.labels = c("Fox", "MSNBC"),
+               omit.stat = c("f", "ser"),
+               star.cutoffs = c(0.1, 0.05, 0.01),
+               notes = "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01",
+               notes.append = FALSE,
+               header = FALSE,
+               omit = "Constant",
+               dep.var.labels.include = FALSE,
+               keep.stat = c("n", "rsq", "adj.rsq"), 
+               omit.table.layout = "m"
+     )
+   )
+ }
> walk2(outcomes, outcome_labels, create_and_print_table)
> 
> 
> ### Experiment 2
> 
> outcomes <- c("scale", "censorship", "privacy", "congress", "influence", "fav_tech")
> outcome_labels <- c("whether the size of big tech is good for consumers",
+                     "whether social networks should remove more false, offensive, misleading, and harmful content", 
+                     "whether big tech does a good job of keeping user info secure", 
+                     "whether congress should do more to regulate how big tech gather data", 
+                     "whether big tech companies exert too much influence over the political life in america",
+                     "favorability of big tech companies")
> 
> create_and_print_table <- function(outcome, outcome_label) {
+   model_w1 <- lm(as.formula(paste0(outcome, "w1 ~ treatmentw1")), data = text)
+   model_w2 <- lm(as.formula(paste0(outcome, "w2 ~ treatmentw1")), data = text)
+   capture.output(
+     stargazer(model_w1, model_w2,
+               type = "latex",
+               out = paste0("Output/Individual_questions/", outcome, "_text.tex"),
+               title = paste("Effect of treatment on", outcome_label),
+               label = paste0("tab:text_", outcome),
+               column.labels = c("Wave 1", "Wave 2"),
+               model.names = FALSE,
+               dep.var.labels = NULL,
+               covariate.labels = c("Fox", "MSNBC"),
+               omit.stat = c("f", "ser"),
+               star.cutoffs = c(0.1, 0.05, 0.01),
+               notes = "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01",
+               notes.append = FALSE,
+               dep.var.labels.include = FALSE,
+               header = FALSE,
+               omit = "Constant",
+               keep.stat = c("n", "rsq", "adj.rsq")
+     )
+   )
+ }
> walk2(outcomes, outcome_labels, create_and_print_table)
> 
> ### Experiment 3
> 
> outcomes <- c("scale", "censorship", "privacy", "congress", "influence", "fav_tech")
> outcome_labels <- c("whether the size of big tech is good for consumers",
+                     "whether social networks should remove more false, offensive, misleading, and harmful content", 
+                     "whether big tech does a good job of keeping user info secure", 
+                     "whether congress should do more to regulate how big tech gather data", 
+                     "whether big tech companies exert too much influence over the political life in america",
+                     "favorability of big tech companies")
> 
> create_and_print_table <- function(outcome, outcome_label) {
+   model_w1 <- lm(as.formula(paste0(outcome, "w1 ~ treatmentw1")), data = vid)
+   model_w2 <- lm(as.formula(paste0(outcome, "w2 ~ treatmentw1")), data = vid)
+   capture.output(
+     stargazer(model_w1, model_w2,
+               type = "latex",
+               out = paste0("Output/Individual_questions/", outcome, "_vid.tex"),
+               title = paste("Effect of treatment on", outcome_label),
+               label = paste0("tab:text_", outcome),
+               column.labels = c("Wave 1", "Wave 2"),
+               model.names = FALSE,
+               dep.var.labels = NULL,
+               covariate.labels = c("Fox", "MSNBC"),
+               omit.stat = c("f", "ser"),
+               star.cutoffs = c(0.1, 0.05, 0.01),
+               notes = "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01",
+               notes.append = FALSE,
+               dep.var.labels.include = FALSE,
+               header = FALSE,
+               omit = "Constant",
+               keep.stat = c("n", "rsq", "adj.rsq")
+     )
+   )
+ }
> walk2(outcomes, outcome_labels, create_and_print_table)
> 
> 
> 
> ##########################################################
> # E. Attrition Rates across Experiments 1–3
> ##########################################################
> 
> # --- Attrition: Experiment 1 (Marijuana) ---
> exp1_attr <- mar %>%
+   filter(forcedchoicew1 == 1) %>%
+   transmute(
+     Experiment = 1,
+     treatment = case_when(
+       foxw1 == 1    ~ "Fox",
+       msnbcw1 == 1  ~ "MSNBC",
+       TRUE          ~ "Control"
+     ),
+     resp_w1 = !is.na(PIDw1),
+     resp_w2 = !is.na(Progressw2),
+     resp_w3 = !is.na(Progressw3)
+   )
> 
> 
> # --- Attrition: Experiment 2 (Text) ---
> exp2_attr <- text %>%
+   transmute(
+     Experiment = 2,
+     treatment = factor(
+       case_when(
+         treatmentw1 %in% c("Control","Fox","MSNBC") ~ treatmentw1,
+         treatmentw1 == "placebo" ~ "Control",
+         treatmentw1 == "anti"    ~ "Fox",
+         treatmentw1 == "pro"     ~ "MSNBC",
+         TRUE ~ NA_character_
+       ),
+       levels = c("Control","Fox","MSNBC")
+     ),
+     resp_w1 = !is.na(EndDatew1),
+     resp_w2 = !is.na(EndDatew2),
+     resp_w3 = NA_real_
+   )
> 
> 
> # --- Attrition: Experiment 3 (Video) ---
> exp3_attr <- vid %>%
+   transmute(
+     Experiment = 3,
+     treatment = factor(
+       case_when(
+         treatmentw1 %in% c("Control","Fox","MSNBC") ~ treatmentw1,
+         treatmentw1 == "placebo" ~ "Control",
+         treatmentw1 == "anti"    ~ "Fox",
+         treatmentw1 == "pro"     ~ "MSNBC",
+         TRUE ~ NA_character_
+       ),
+       levels = c("Control","Fox","MSNBC")
+     ),
+     resp_w1 = !is.na(EndDatew1),
+     resp_w2 = !is.na(EndDatew2),
+     resp_w3 = NA_real_
+   )
> 
> 
> attrition_all <- bind_rows(exp1_attr, exp2_attr, exp3_attr) %>%
+   filter(!is.na(treatment)) %>%     
+   group_by(Experiment, treatment) %>%
+   summarise(
+     `$N_{wave1}$` = sum(resp_w1, na.rm = TRUE),
+     `$N_{wave2}$` = sum(resp_w2, na.rm = TRUE),
+     `$N_{wave3}$` = if_else(first(Experiment) == 1,
+                             sum(resp_w3, na.rm = TRUE),
+                             NA_real_),
+     .groups = "drop"
+   ) %>%
+   mutate(
+     `Attrition Rate` = if_else(
+       Experiment == 1,
+       round((`$N_{wave1}$` - `$N_{wave3}$`) / `$N_{wave1}$`, 2),
+       round((`$N_{wave1}$` - `$N_{wave2}$`) / `$N_{wave1}$`, 2)
+     )
+   ) %>%
+   arrange(Experiment, treatment) %>%
+   rename(Treatment = treatment) %>%
+   distinct() %>%        
+   mutate(
+     Experiment     = as.integer(Experiment),
+     `$N_{wave1}$`  = as.integer(`$N_{wave1}$`),
+     `$N_{wave2}$`  = as.integer(`$N_{wave2}$`),
+     `$N_{wave3}$`  = as.integer(`$N_{wave3}$`)
+   )
> 
> print(
+   xtable(
+     attrition_all,
+     caption = "Sample Sizes and Attrition Rates by Experiment and Treatment",
+     label   = "tab:all_attrition",
+     digits  = c(0, 0, 0, 0, 0, 0, 2)
+   ),
+   file             = "Output/tableA7_All_Attrition.tex",
+   include.rownames = FALSE
+ )
> 
> 
> 
> 
> ##########################################################
> # E. Baseline Demographics by Wave Participation
> #    for Experiments 1, 2, and 3
> ##########################################################
> 
> make_wave_balance <- function(df, var, levels) {
+   var_sym <- rlang::sym(var)
+   df %>%
+     dplyr::filter(!!var_sym %in% levels) %>%
+     dplyr::count(wave, !!var_sym) %>%
+     dplyr::group_by(wave) %>%
+     dplyr::mutate(prop = round(n / sum(n), 2)) %>%
+     dplyr::ungroup() %>%
+     dplyr::select(!!var_sym, wave, prop) %>%
+     tidyr::pivot_wider(
+       id_cols     = !!var_sym,
+       names_from  = wave,
+       values_from = prop,
+       values_fill = list(prop = 0)
+     ) %>%
+     dplyr::rename(Category = !!var_sym)
+ }
> 
> educ_lvls   <- c("Did Not Graduate High School","High School Graduate","Some College",
+                  "2 Year Degree","4 Year Degree","Post Graduate Degree")
> gender_lvls <- c("Male","Female")
> party_lvls  <- c("Democrat","Republican","Independent")
> race_lvls   <- c("Non-Hispanic White","Black","Hispanic","Asian and Pacific Islander",
+                  "Native American","Multi-Racial","Other")
> 
> # --- Experiment 1 (Marijuana) ---
> wave1_df1 <- mar %>%
+   filter(forcedchoicew1 == 1, !is.na(PIDw1)) %>%
+   transmute(wave = "Wave 1",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> wave2_df1 <- mar %>%
+   filter(forcedchoicew1 == 1, !is.na(Progressw2)) %>%
+   transmute(wave = "Wave 2",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> wave3_df1 <- mar %>%
+   filter(forcedchoicew1 == 1, !is.na(Progressw3)) %>%
+   transmute(wave = "Wave 3",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> balance_waves1 <- bind_rows(wave1_df1, wave2_df1, wave3_df1) %>%
+   mutate(wave = factor(wave, levels = c("Wave 1", "Wave 2", "Wave 3")))
> 
> edu_bal1    <- make_wave_balance(balance_waves1, "edu",    educ_lvls)   %>% mutate(Demographic="Education")
> gender_bal1 <- make_wave_balance(balance_waves1, "gender", gender_lvls)%>% mutate(Demographic="Gender")
> party_bal1  <- make_wave_balance(balance_waves1, "party",  party_lvls) %>% mutate(Demographic="Party")
> race_bal1   <- make_wave_balance(balance_waves1, "race",   race_lvls)  %>% mutate(Demographic="Race")
> 
> baseline_demo_1 <- bind_rows(edu_bal1, gender_bal1, party_bal1, race_bal1) %>%
+   dplyr::select(Demographic, Category, `Wave 1`, `Wave 2`, `Wave 3`) %>%
+   dplyr::group_by(Demographic) %>%
+   dplyr::mutate(
+     Demographic = if_else(dplyr::row_number() == 1, Demographic, "")
+   ) %>%
+   dplyr::ungroup()
> 
> 
> print(
+   xtable(
+     baseline_demo_1,
+     caption = "Demographics by Wave in Experiment 1",
+     label   = "tab:baseline_demo_wave_exp1"
+   ),
+   file             = "Output/tableA8_Baseline_Demo_Waves_Exp1.tex",
+   include.rownames = FALSE
+ )
> 
> 
> # --- Experiment 2 (Text) ---
> wave1_df2 <- text %>%
+   filter(!is.na(EndDatew1)) %>%
+   transmute(wave = "Wave 1",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> wave2_df2 <- text %>%
+   filter(!is.na(EndDatew2)) %>%
+   transmute(wave = "Wave 2",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> balance_waves2 <- bind_rows(wave1_df2, wave2_df2) %>%
+   mutate(wave = factor(wave, levels = c("Wave 1", "Wave 2")))
> 
> edu_bal2    <- make_wave_balance(balance_waves2, "edu",    educ_lvls)   %>% mutate(Demographic="Education")
> gender_bal2 <- make_wave_balance(balance_waves2, "gender", gender_lvls)%>% mutate(Demographic="Gender")
> party_bal2  <- make_wave_balance(balance_waves2, "party",  party_lvls) %>% mutate(Demographic="Party")
> race_bal2   <- make_wave_balance(balance_waves2, "race",   race_lvls)  %>% mutate(Demographic="Race")
> 
> baseline_demo_2 <- bind_rows(edu_bal2, gender_bal2, party_bal2, race_bal2) %>%
+   dplyr::select(Demographic, Category, `Wave 1`, `Wave 2`) %>%
+   dplyr::group_by(Demographic) %>%
+   dplyr::mutate(Demographic = if_else(dplyr::row_number()==1, Demographic, "")) %>%
+   dplyr::ungroup()
> 
> 
> print(
+   xtable(
+     baseline_demo_2,
+     caption = "Demographics by Wave in Experiment 2 (Text)",
+     label   = "tab:baseline_demo_wave_exp2"
+   ),
+   file             = "Output/tableA9_Baseline_Demo_Waves_Exp2.tex",
+   include.rownames = FALSE
+ )
> 
> 
> # --- Experiment 3 (Video) ---
> wave1_df3 <- vid %>%
+   filter(!is.na(EndDatew1)) %>%
+   transmute(wave = "Wave 1",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> wave2_df3 <- vid %>%
+   filter(!is.na(EndDatew2)) %>%
+   transmute(wave = "Wave 2",
+             edu    = eduw1,
+             gender = genderw1,
+             race   = racew1,
+             party  = pid3_leanerw1)
> 
> balance_waves3 <- bind_rows(wave1_df3, wave2_df3) %>%
+   mutate(wave = factor(wave, levels = c("Wave 1", "Wave 2")))
> 
> edu_bal3    <- make_wave_balance(balance_waves3, "edu",    educ_lvls)   %>% mutate(Demographic="Education")
> gender_bal3 <- make_wave_balance(balance_waves3, "gender", gender_lvls)%>% mutate(Demographic="Gender")
> party_bal3  <- make_wave_balance(balance_waves3, "party",  party_lvls) %>% mutate(Demographic="Party")
> race_bal3   <- make_wave_balance(balance_waves3, "race",   race_lvls)  %>% mutate(Demographic="Race")
> 
> baseline_demo_3 <- bind_rows(edu_bal3, gender_bal3, party_bal3, race_bal3) %>%
+   dplyr::select(Demographic, Category, `Wave 1`, `Wave 2`) %>%
+   dplyr::group_by(Demographic) %>%
+   dplyr::mutate(Demographic = if_else(dplyr::row_number()==1, Demographic, "")) %>%
+   dplyr::ungroup()
> 
> print(
+   xtable(
+     baseline_demo_3,
+     caption = "Demographics by Wave in Experiment 3 (Video)",
+     label   = "tab:baseline_demo_wave_exp3"
+   ),
+   file             = "Output/tableA10_Baseline_Demo_Waves_Exp3.tex",
+   include.rownames = FALSE
+ )
> 
> 
> ##########################################################
> # F. Wave 2 Choice in Experiment 1
> ##########################################################
> w1 <- read_csv("Data/MediaSSI_Dec2017_w1_recoded.csv")
Rows: 7394 Columns: 35
-- Column specification --------------------------------------------------------
Delimiter: ","
chr  (7): StartDate, EndDate, ResponseId, med_pref, med_choice, PID, article...
dbl (27): X, Progress, consent, gender, educ, income, party1, party4, forced...
num  (1): race

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
> w2 <- read_csv("Data/MediaSSI_Dec2017_w2_recoded.csv")
Rows: 4927 Columns: 24
-- Column specification --------------------------------------------------------
Delimiter: ","
chr  (7): StartDate, EndDate, ResponseId, med_pref, med_choice, PID, article...
dbl (17): Progress, msnbc, fox, entertainment, forcedchoice, pid, mar_costmo...

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
> w3 <- read_csv("Data/MediaSSI_Dec2017_w3_recoded.csv")
Rows: 4527 Columns: 17
-- Column specification --------------------------------------------------------
Delimiter: ","
chr  (5): StartDate, EndDate, ResponseId, med_pref, PID
dbl (12): Progress, mar_costmore, mar_fewserious, mar_wrong, mar_violence, m...

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
> 
> keep_best <- \(df, prog) df %>%
+   filter(!is.na(.data[[prog]])) %>%
+   arrange(PID, desc(.data[[prog]])) %>%
+   group_by(PID) %>% slice_head(n = 1) %>% ungroup()
> 
> w1u <- keep_best(w1, "Progress")
> w2u <- keep_best(w2, "Progress")
> w3u <- keep_best(w3, "Progress")
> 
> w123 <- w1u %>%
+   left_join(w2u, by = "PID", suffix = c("", "_w2")) %>%
+   left_join(w3u, by = "PID", suffix = c("", "_w3"))
> 
> analysis_data <- w123 %>%
+   filter(forcedchoice == 1 & forcedchoice_w2 == 0) %>%
+   mutate(
+     wave1_treatment = case_when(
+       fox == 1 ~ "Fox",
+       msnbc == 1 ~ "MSNBC",
+       entertainment == 1 ~ "Entertainment",
+       TRUE ~ NA_character_
+     ),
+     wave2_choice = article_read_w2
+   ) %>%
+   filter(!is.na(wave1_treatment) & !is.na(wave2_choice) & 
+            wave2_choice %in% c("Fox", "MSNBC", "Entertainment"))
> 
> cat(sprintf("Analyzing %d respondents\n\n", nrow(analysis_data)))
Analyzing 1148 respondents

> 
> flow_data <- analysis_data %>%
+   count(wave1_treatment, wave2_choice, name = "freq") %>%
+   group_by(wave1_treatment) %>%
+   mutate(
+     total_wave1 = sum(freq),
+     pct_within_treatment = freq / total_wave1 * 100
+   ) %>%
+   ungroup()
> 
> 
> 
> # Summary table
> summary_table <- flow_data %>%
+   dplyr::select(wave1_treatment, wave2_choice, freq, pct_within_treatment) %>%
+   arrange(wave1_treatment, desc(pct_within_treatment)) %>%
+   mutate(pct_within_treatment = round(pct_within_treatment, 1))
> 
> cat("Summary of Wave 2 Choices by Wave 1 Treatment:\n")
Summary of Wave 2 Choices by Wave 1 Treatment:
> cat("=============================================\n\n")
=============================================

> 
> for(treatment in c("Entertainment", "Fox", "MSNBC")) {
+   cat(paste0("Wave 1 Treatment: ", treatment, "\n"))
+   cat("-------------------\n")
+   
+   treatment_data <- summary_table %>% 
+     filter(wave1_treatment == treatment)
+   
+   total_n <- sum(treatment_data$freq)
+   cat(sprintf("  Total respondents: %d\n", total_n))
+   
+   for(i in 1:nrow(treatment_data)) {
+     cat(sprintf("    Chose %s in Wave 2: %d respondents (%.1f%%)\n",
+                 treatment_data$wave2_choice[i],
+                 treatment_data$freq[i],
+                 treatment_data$pct_within_treatment[i]))
+   }
+   cat("\n")
+ }
Wave 1 Treatment: Entertainment
-------------------
  Total respondents: 381
    Chose Fox in Wave 2: 144 respondents (37.8%)
    Chose Entertainment in Wave 2: 126 respondents (33.1%)
    Chose MSNBC in Wave 2: 111 respondents (29.1%)

Wave 1 Treatment: Fox
-------------------
  Total respondents: 404
    Chose Fox in Wave 2: 150 respondents (37.1%)
    Chose Entertainment in Wave 2: 131 respondents (32.4%)
    Chose MSNBC in Wave 2: 123 respondents (30.4%)

Wave 1 Treatment: MSNBC
-------------------
  Total respondents: 363
    Chose Fox in Wave 2: 150 respondents (41.3%)
    Chose MSNBC in Wave 2: 115 respondents (31.7%)
    Chose Entertainment in Wave 2: 98 respondents (27.0%)

> 
> # Statistical test for independence
> cat("\nChi-square test of independence:\n")

Chi-square test of independence:
> cat("================================\n")
================================
> 
> # Contingency table
> cont_table <- table(analysis_data$wave1_treatment, analysis_data$wave2_choice)
> 
> # Chi-square test
> chi_test <- chisq.test(cont_table)
> cat(sprintf("Chi-square statistic: %.2f\n", chi_test$statistic))
Chi-square statistic: 4.05
> cat(sprintf("Degrees of freedom: %d\n", chi_test$parameter))
Degrees of freedom: 4
> cat(sprintf("P-value: %.4f\n", chi_test$p.value))
P-value: 0.3988
> 
> # Calculate "stickiness"
> stickiness <- flow_data %>%
+   filter(wave1_treatment == wave2_choice) %>%
+   group_by(wave1_treatment) %>%
+   summarise(
+     stayed_same = sum(freq),
+     .groups = 'drop'
+   ) %>%
+   left_join(
+     flow_data %>%
+       group_by(wave1_treatment) %>%
+       summarise(total = sum(freq), .groups = 'drop'),
+     by = "wave1_treatment"
+   ) %>%
+   mutate(
+     pct_stayed = stayed_same / total * 100
+   )
> 
> cat("\n\n'Stickiness' - Respondents who chose same outlet in Wave 2:\n")


'Stickiness' - Respondents who chose same outlet in Wave 2:
> cat("========================================================\n")
========================================================
> for(i in 1:nrow(stickiness)) {
+   cat(sprintf("%s: %.1f%% stayed with %s\n", 
+               stickiness$wave1_treatment[i],
+               stickiness$pct_stayed[i],
+               stickiness$wave1_treatment[i]))
+ }
Entertainment: 33.1% stayed with Entertainment
Fox: 37.1% stayed with Fox
MSNBC: 31.7% stayed with MSNBC
> 
> 
> # Stacked bar chart
> stacked_bar <- ggplot(flow_data, 
+                       aes(x = wave1_treatment, y = pct_within_treatment, fill = wave2_choice)) +
+   geom_bar(stat = "identity", position = "stack") +
+   geom_text(aes(label = sprintf("%.0f%%", pct_within_treatment)),
+             position = position_stack(vjust = 0.5),
+             size = 3.5,
+             color = "white") +
+   scale_fill_manual(values = c("Entertainment" = "#808080",
+                                "Fox" = "#A31F34", 
+                                "MSNBC" = "#315485")) +
+   scale_y_continuous(labels = percent_format(scale = 1)) +
+   labs(
+        x = "Wave 1 Treatment (Forced Exposure)",
+        y = "Percentage",
+        fill = "Wave 2 Choice\n(Free Choice)") +
+   theme_minimal() +
+   theme(legend.position = "right",
+         axis.text = element_text(size = 11),
+         axis.title = element_text(size = 12),
+         plot.title = element_text(size = 14, face = "bold"))
> ggsave("Output/figA3_wave2_choice_stacked_bar.pdf", stacked_bar, width = 8, height = 6)
> 
> 
> 
> 
> proc.time()
   user  system elapsed 
  3.619   0.181   3.667 
